# replication code - CJPS - "place types" by Sophie Borwein and Jack Lucas
# note: this file reproduces the data and analysis in the CJPS research note
# for the place types / postal code file, use "ada_placetypes" 

library(tidyverse)
library(rio)
library(sf)
library(patchwork)
# set your working directory here 
setwd("")

# helper file for variable labels
labels <- rio::import("variable_labels.xlsx")

###############################################################################
# figure 1: summary of place types characteristics 

# mode function
find_mode <- function(x) {
  u <- unique(x)
  tab <- tabulate(match(x, u))
  u[tab == max(tab)]
}

# data
classes <- rio::import("ada_classes.dta")
ada <- rio::import("ada_raw_nomissing.dta") %>% 
  mutate(logdens = log(pop_density), h_conpost2000 = h_con0105 + h_con0610 + h_con1115 + h_con1621) %>%
  dplyr::select(geouid, logdens, age_median, dw_singledetached, married, inc_decile1, inc_decile10, vm_notvm, vm_frac, vm_indigenousID, imm_immigrant, h_conPre60, h_con6180, h_con8190, h_con9100, h_conpost2000,
                rel_christian, edu_postDegree, lab_employment, lab_noc1:lab_noc9, com_cartruck, com_transit) %>% 
  left_join(., classes, by="geouid") %>% 
  pivot_longer(cols = c(logdens:com_transit)) %>% group_by(name) %>% mutate(quantile = ntile(value, 5)) %>%
  group_by(name, c7) %>% summarise(mean_quantile = mean(quantile), mode_quantile = find_mode(quantile)) %>% 
  left_join(., labels, by="name")

# set up for plot
ada$description[ada$name=="h_conpost2000"] <- "Construction After 2000 (%)"
ada$description[ada$name=="vm_notvm"] <- "Racial Identity: White (%)"
ada$order[ada$name=="h_conpost2000"] <- 10
ada$order[ada$name=="dw_singledetached"] <- 12
ada$description[ada$name=="logdens"] <- "Population Density (Log)"
ada$category[ada$name=="h_conpost2000"] <- "Housing"
ada$category[ada$name=="dw_singledetached"] <- "Housing"
ada$category[ada$name=="vm_frac"] <- "Diversity"
ada$category[ada$name=="vm_notvm"] <- "Diversity"
ada$category[ada$name=="rel_christian"] <- "Diversity"
ada$category[ada$name=="imm_immigrant"] <- "Diversity"
ada$category[ada$name=="vm_indigenousID"] <- "Diversity"
ada$category[ada$name=="age_median"] <- "Other"
ada$category[ada$name=="logdens"] <- "Other"
ada$category[ada$name=="edu_postDegree"] <- "Other"
ada$category[ada$name=="married"] <- "Other"

# plot
ada$mode_quantile <- factor(ada$mode_quantile, labels = c("Lowest Quintile", "Second Quintile", "Middle Quintile", "Fourth Quintile", "Highest Quintile"))
ada$c7 <- factor(ada$c7, levels = c(1,2,5,3,7,6,4))
ada$c7 <- factor(ada$c7, labels = c("Rural 1\n(Low-Income Rural)", "Rural 2\n(Higher-Income Rural)", "Rural 3\n(Indigenous Rural)", "Suburban 1\n(Low-Diversity Suburban)", "Suburban 2\n(High-Diversity Suburban)", "Urban 1\n(Working-Class Urban)", "Urban 2\n(Professional-Class Urban)"))
ggplot(ada, aes(x=c7, y=reorder(description,-order), fill=factor(mode_quantile))) + 
  geom_tile() + scale_fill_manual(values=c("#a6611a", "#dfc27d", "#f7f7f7", "#80cdc1", "#018571")) + 
  ggforce::facet_col(facets = vars(category), 
                     scales = "free_y", 
                     space = "free") + 
  xlab("") + ylab("") + 
  theme_minimal() + theme(legend.position = "right", legend.title = element_blank(),
                          panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.x = element_text(angle=45, hjust=1, color=c("#1b9e77","#d95f02", "#a6761d", "#7570b3","#e6ab02", "#66a61e", "#e7298a")),
                          axis.text.y = element_text(color="black"),
                          strip.text = element_text(hjust=0)) +
  guides(fill=guide_legend(nrow=5, byrow=TRUE))

###############################################################################
# figure 2: map 

# import shapefile
shp <- st_read("ada_shp/lada000b21a_e.shp") %>% mutate(geouid = as.numeric(ADAUID))
shp <- left_join(shp, classes, by="geouid")
shp <- st_as_sf(shp)

# make map
map <- ggplot(shp, aes(fill=factor(c7))) + 
  geom_sf(color = "black", size=0, linewidth=0.001, show.legend=T) + 
  theme_minimal() + coord_sf(datum=NA) + 
  scale_fill_manual(values = c("#1b9e77","#d95f02", "#7570b3", "#e7298a", "#66a61e", "#e6ab02", "#a6761d"))

###############################################################################
# figures 3 and 4: interpolated data 

# interpolated data 
int <- rio::import("ada_vs_interpolated.dta") %>%
  dplyr::rename(geouid = adauid) %>% mutate(geouid = as.numeric(geouid)) %>% 
  left_join(., classes, by="geouid") %>% filter(!is.na(c7)) %>% 
  pivot_longer(cols = contains("vs_")) %>% mutate(region = substr(geouid,1,1)) %>%
  filter(name!="vs_other" & name!="vs_green")

# set up for plot
int$value[int$name=="vs_bloc" & int$region!="2"] <- NA
int <- int %>%
  group_by(name, c7) %>% mutate(meanvs = mean(value, na.rm=T))
int$name <- factor(int$name, labels = c("Bloc Quebecois", "Conservative", "Liberal", "NDP"))
int$name <- factor(int$name, levels=c("Liberal", "Conservative", "NDP", "Bloc Quebecois"))
int$ptype <- int$c7
int$ptype <- factor(int$ptype, levels = c(1,2,5,3,7,6,4))
int$ptype <- factor(int$ptype, labels=c("Rural 1", "Rural 2", "Rural 3", "Suburban 1", "Suburban 2", "Urban 1", "Urban 2"))
mean <- int %>% group_by(name, ptype) %>% slice(1)

# figure 3
ggplot(int, aes(x=value, fill=name)) + 
  geom_density() + facet_grid(ptype~name) + 
  geom_vline(aes(xintercept=meanvs)) + 
  xlab("") + ylab("") + 
  scale_y_continuous(expand=c(0,0), limits=c(0,8)) + 
  scale_x_continuous(expand=c(0,0), limits=c(0,1), labels=scales::percent) + 
  geom_text(data=mean, aes(label = scales::percent(meanvs, accuracy=1L), x=meanvs, y=6.5), hjust=-0.1) + 
  theme_minimal() + theme(legend.position = "none", panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_blank(),
                          strip.text.y = element_text(color = c("black", "blue", "green"))) + 
  scale_fill_manual(values = c("#e41a1c", "#377eb8", "#ff7f00", "#80b1d3"))

# calculate winner
winner <- int %>% group_by(geouid) %>% slice_max(order_by=value, n=1) %>% 
  mutate(name = car::recode(name, "'Bloc Quebecois' = 'BQ'")) %>%
  group_by(region, ptype, name) %>% 
  summarise(n = n()) %>% mutate(percent = n/sum(n)) %>% 
  filter(region!=6)
winner$region <- factor(winner$region, labels=c("ATL", "QC", "ON", "PRA", "BC"))

# figure 4
ggplot(winner, aes(x=region, y=percent, fill=name)) + 
  geom_bar(stat="identity") + xlab("") + ylab("Percentage of ADAs Won by Party") + 
  facet_grid(ptype~name, space="free_x", scales="free_x") + 
  scale_y_continuous(limits=c(0,1), expand=c(0,0), labels=scales::percent) + 
  scale_fill_manual(values = c("#e41a1c", "#ff7f00", "#377eb8","#80b1d3")) + 
  theme_minimal() + theme(legend.position = "none", panel.border = element_rect(fill=NA, linewidth=0.5),
                          panel.spacing = unit(0.8, "lines")) 

###############################################################################
# figure 5: margin of victory

# margin of victory
margin <- int %>% group_by(geouid) %>% arrange(geouid, -value) %>% mutate(margin = value-lead(value)) %>% 
  slice_max(order_by=value, n=1)

# average across place types
margin.all <- margin %>% group_by(ptype) %>% dplyr::summarise(mean = median(margin, na.rm=T)) %>% mutate(region = 0)
margin.region <- margin %>% group_by(region, ptype) %>% dplyr::summarise(mean = median(margin, na.rm=T)) %>% 
  filter(region!=6)

# plot
order <- margin.all %>% arrange(mean) %>% pull(ptype)
margin.all$ptype <- factor(margin.all$ptype, levels=order)
margin.region$ptype <- factor(margin.region$ptype, levels=order)
margin.region$region <- factor(margin.region$region, labels=c("Atlantic", "Quebec", "Ontario", "Prairie", "British Columbia"))
p1 <- ggplot(margin.all, aes(x=mean, y=ptype)) + 
  geom_point(data=margin.all, aes(x=mean, y=ptype), size=2) + 
  scale_x_continuous(labels=scales::percent) + 
  xlab("") + ylab("") + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          legend.position = "bottom",
                          legend.title = element_blank(),
                          strip.text = element_text(hjust=0)) + ggtitle("Overall")
p2 <- ggplot(margin.region, aes(x=mean, y=ptype)) + 
  geom_point() + 
  scale_x_continuous(labels=scales::percent) + 
  xlab("Median Margin of Victory") + ylab("") + facet_wrap(~region, nrow=5) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          legend.position = "bottom",
                          legend.title = element_blank(),
                          strip.text = element_text(hjust=0)) + ggtitle("By Region")
p1 / p2 + plot_layout(heights = c(1,7))

